The benchmarks in the present analysis rely on our own previous analysis and annotation of papers, as well as on open resources such as Papers With Code, including data from from several repositories (e.g, EFF, NLP-progress, SQuAD, RedditSota, etc.).
We focus on “interest” rather than “progress” for AI benchmarks as this is something we can compute using some proxies.
For this analysis we use the normalised hits obtained per benchmark on AItopics over the last decade (2008-2019). Note that the results from 2019 are incomplete.
interest.df <- read.xlsx2("interest_kw_processed_raw_slope.xlsx", sheetIndex = 1)
# interest.df <- readRDS("interest_kw_processed_raw_slope.rds")
colnames(interest.df)[21:32] <- as.character(2008:2019)
interest$category <- NA
for(i in 1:nrow(interest.df)){
interest.df$category[i] <- str_trim(str_split(interest.df$TaskHierarchies[i], pattern = ">")[[1]][1])
}
interest <- select(interest.df, one_of(c("keyword", "category", colnames(interest.df)[5:18],"mean.Interest")))
keywords <- interest$keyword
categories <- interest$category
rownames(interest) <- keywords
interest <- interest[,-(1:2)]
interest["ILSVRC","QL"] <- 0 # <------------------- check it!
interest = colwise(type.convert)(interest)
rownames(interest) <- keywords
interest[interest$mean.Interest ==0, "mean.Interest"]<- 0.0000000000001
interest.mean <- interest$mean.Interest
interest <- select(interest, -mean.Interest)
interest.pond <- interest * interest.mean
barplot(colSums(interest))
barplot(colSums(interest.pond))
knitr::kable(list(data.frame(colSums(interest)), data.frame(colSums(interest.pond))))
|
|
Mapping between AI benchmarks and cognitive abilitiies:
Note that we can perform exactly the same analysis focusing on different (ranges of) years and obtaining the same graph but the width of the edges may vary.
(The network is interactive!)
colours = c("1" = "black", "2" = "#543005","3" = "#8c510a","4" = "#bf812d",
"5" = "#dfc27d","6" = "#f6e8c3","7" = "#f5f5f5","8" = "#c7eae5",
"9" = "#80cdc1", "10" = "#35978f", "11" = "#01665e", "12" = "#003c30", "13" = "#FAFAFA")
vis <- toVisNetworkData(graph_from_incidence_matrix(interest.pond, directed = F, weighted = T))
vis$nodes$value = c(rep(10, nrow(vis$nodes)-14), colSums(interest.pond)*10000)
vis$nodes$title <- vis$nodes$label
vis$nodes$category <- c(categories, rep("CogAb", 14))
vis$nodes$group <- vis$nodes$category
vis$nodes$color <- colours[as.numeric(as.factor(vis$nodes$category))]
vis$edges$value <- vis$edges$weight*100
visNetwork(vis$nodes, vis$edges, height = "1000px", width = "100%") %>%
visEdges(arrows = "to", color = list(color = 'rgba(70,130,180,0.3)', highlight ="#4682B4")) %>%
visIgraphLayout(
physics = F,
randomSeed = 2017,
layout = "layout_with_fr"
) %>%
visInteraction(navigationButtons = TRUE) %>%
visOptions(selectedBy = "group",highlightNearest = TRUE )
(Groups from https://paperswithcode.com/)
plotIterest.Cat(interest.m, "Computer Vision")
plotIterest.Cat(interest.m, "Graphs")
plotIterest.Cat(interest.m, "Natural Language Processing")
plotIterest.Cat(interest.m, "Playing Games")
plotIterest.Cat(interest.m, "Miscellaneous")
plotIterest.Cat(interest.m, "Medical")
plotIterest.Cat(interest.m, "Methodology")
plotIterest.Cat(interest.m, "Speech")
plotIterest.Cat(interest.m, "Reasoning")
plotIterest.Cat(interest.m, "Time Series")
plotIterest.Cat(interest.m, "Computer Code")